/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% The impact of divorce laws on the equilibrium in the marriage market.
% Ana Reynoso
% April 2024
%
% This file inputs the cleaned PSID data and creates the relevant variables  
% to construct moments.
%
% Data: PSID 1968-1992
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
*/

*%------------------------ Replication Path -------------------------------%

clear all
clear
set mem 1000M
set maxvar 15000

*%--- Indicate location of Replication folder:
*global replication_location "C:\update_with_your_path"

*%------------------------ Preliminaries ----------------------------------%

cd "$replication_location\Data\Inputs"
u "households_psid.dta", clear

qui{
* Use year of survey to be consistent with reduced form evidence
replace year=year+1 if year!=. 
sort state year

* Keep only MCD states
drop unilateral
gen unilateral=0
replace unilateral=1 if state==1 & year>=1971
replace unilateral=1 if state==2 & year>=1973
replace unilateral=1 if state==4 & year>=1970
replace unilateral=1 if state==5 & year>=1972
replace unilateral=1 if state==6 & year>=1973
replace unilateral=1 if state==7 & year>=1968
replace unilateral=1 if state==9 & year>=1971
replace unilateral=1 if state==10 & year>=1973
replace unilateral=1 if state==11 & year>=1971
replace unilateral=1 if state==13 & year>=1973
replace unilateral=1 if state==14 & year>=1970
replace unilateral=1 if state==15 & year>=1969
replace unilateral=1 if state==16 & year>=1972
replace unilateral=1 if state==18 & year>=1973
replace unilateral=1 if state==20 & year>=1975
replace unilateral=1 if state==21 & year>=1972
replace unilateral=1 if state==22 & year>=1974
replace unilateral=1 if state==25 & year>=1973
replace unilateral=1 if state==26 & year>=1972
replace unilateral=1 if state==27 & year>=1967
replace unilateral=1 if state==28 & year>=1971
replace unilateral=1 if state==30 
replace unilateral=1 if state==33 & year>=1971
replace unilateral=1 if state==35
replace unilateral=1 if state==36 & year>=1971
replace unilateral=1 if state==38 & year>=1975
replace unilateral=1 if state==42 & year>=1970
replace unilateral=1 if state==46 & year>=1973
replace unilateral=1 if state==48 & year>=1978
replace unilateral=1 if state==49 & year>=1977
replace unilateral=1 if state==50
replace unilateral=1 if state==51 & year>=1972

replace state=. if state==0 | state==99
replace unilateral=. if state==.
label var unilateral "Unilateral"
bysort state: egen maxud=max(unilateral)

drop if maxud==1 | state==.

* Define marriage markets
gen USCBregion=.
replace USCBregion = 1 if state==44 | state==29 | state==31 | state==37
replace USCBregion = 2 if state==12 | state==34 | state==24 | state==40 | state==43
replace USCBregion = 3 if state==8 | state==19 | state==32 | state==39 | state==45 | state==47 
replace USCBregion = 4 if state==23 | state==41 | state==3 | state==17

* Select individuals that are observed getting married in the data or single
*- Create a person identifier
gen woman=.
replace woman=person if gender==2
replace woman=s_person if s_gender==2
gen man=.
replace man=person if gender==1
replace man=s_person if s_gender==1
gen sample_person=.
replace sample_person= person if sample_member==1 
replace sample_person=s_person if sample_member==0 
*- year of first marriage for a sample person
gen year_firstmarriage_sample=year_firstmarriage if sample_member==1
replace year_firstmarriage_sample=s_year_firstmarriage if sample_member==0
replace year_firstmarriage_sample=-9999 if year_firstmarriage_sample>=9998
*- year the person is observed for the first time
egen male_min_year=min(year), by(man)
egen female_min_year=min(year), by(woman)
*- determine if a sample individual is seen getting married
gen male_see_mar=(year_firstmarriage_sample>=male_min_year)
replace male_see_mar=1 if year_firstmarriage_sample==male_min_year-1 | year_firstmarriage_sample==male_min_year-2 | year_firstmarriage_sample==male_min_year-3 
gen female_see_mar=(year_firstmarriage_sample>=female_min_year)
replace female_see_mar=1 if year_firstmarriage_sample==female_min_year-1 | year_firstmarriage_sample==female_min_year-2 | year_firstmarriage_sample==female_min_year-3
*- identify single individuals
replace marit=. if marit>=8
bysort woman: egen female_min_marit=min(marit)
bysort man: egen male_min_marit=min(marit)
gen single=.
replace single=1 if female_min_marit==2 | male_min_marit==2
bysort person: egen head_min_age=min(age)
keep if female_see_mar==1 | male_see_mar==1 | single==1 | head_min_age<=30 
*- Identify households (irrespective of changes in composition of members)
gen hh_id=. 
*- sort and order
sort sample_person year
order sample_person year state man woman marit seqnum s_seqnum sample_member s_sample_member
*- determine if household has two sample members and marital status changes
gen two_sample_mem=0
replace two_sample_mem=1 if sample_member==1 & s_sample_member==1
sort sample_person year         
bysort sample_person: gen obs=_n 
bysort sample_person: egen max_two_sample_mem=max(two_sample_mem)
bysort sample_person: egen maxdi=max(marit)
*- if household never has two sample members
replace hh_id=sample_person if max_two_sample_mem==0 
*- if household ever has two sample members but never splits up
replace hh_id=sample_person if maxdi<=2
*- if household ever has two sample members and splits up at some point
*-- conflicting head sample member (sample_person of hh that has two sample persons and is seen splitting off)
gen conflict_head=.
replace conflict_head=1 if max_two_sample_mem==1&maxdi>=3
gen conflict_head_id=sample_person if conflict_head==1
*-- determine if a woman was ever married to a conflict_head
bysort woman: egen married_conflict=max(conflict_head)
replace married_conflict=. if woman==.
*-- link households with a conflicting sample member by their id67 and id of conflict sample member
bysort id67: egen id_conflict=max(conflict_head_id)
replace hh_id=id_conflict if conflict_head==1 | married_conflict==1

* Households in the data
drop obs
sort hh_id year          
bysort hh_id: gen obs=_n 
order hh_id year obs
drop if hh_id==. 
bysort hh_id: egen max_marit=max(marit)
bysort hh_id: egen min_marit=min(marit)
gen couple=0
replace couple=1 if single==.
drop if max_marit>2 & couple==0
drop if min_marit<2 & couple==0
drop if nbr_marriages>=1 & nbr_marriages<=4 & couple==0 
bysort hh_id: gen Nbr_obs=_N //
*- ever divorced?
gen ever_divorced=0 if couple==1
replace ever_divorced=1 if (max_marit==4 | max_marit==5) & couple==1
*- year first divorce
gen year_divorce=.
replace year_divorce=year if marit==4| marit==5
bysort hh_id: egen min_yeardiv=min(year_divorce)
gen couple_divorces=.
replace couple_divorces=1 if year==min_yeardiv 
*- dummy indicating first marriage in family 
gen first_marriage=.
replace first_marriage=1 if year<min_yeardiv & couple==1
replace first_marriage=1 if ever_divorced==0 & couple==1
*- year first marriage
gen year_marriage=.
replace year_marriage=year if marit==1
bysort hh_id: egen min_yearmarr=min(year_marriage)
gen couple_marries=.
replace couple_marries=1 if year==min_yearmarr
*- couples at the moment of marriage
count if couple==1 & first_marr==1 & couple_marries==1 
*- educ female and male at moment of marriage
gen femeduc=.
replace femeduc=heduc if gender==2
replace femeduc=weduc if s_gender==2
replace femeduc=. if femeduc==9
gen femeduc_cat=.
replace femeduc_cat=1 if femeduc<=4 & femeduc!=.  
replace femeduc_cat=2 if femeduc==5 | femeduc==6 
replace femeduc_cat=3 if femeduc>=7 & femeduc!=. 
bysort woman: egen max_femeduc_cat=max(femeduc_cat)
bysort woman: egen min_femeduc_cat=min(femeduc_cat)
gen fem_inihk=.
replace fem_inihk=femeduc_cat if couple_marries==1
replace fem_inihk=min_femeduc_cat if fem_inihk==. & couple_marries==1
gen female_type=fem_inihk
replace female_type=max_femeduc_cat if single==1 & gender==2 & obs==1
gen maleduc=.
replace maleduc=heduc if gender==1
replace maleduc=weduc if s_gender==1
replace maleduc=. if maleduc==9
gen maleduc_cat=.
replace maleduc_cat=1 if maleduc<=4 & maleduc!=. 
replace maleduc_cat=2 if maleduc==5 | maleduc==6 
replace maleduc_cat=3 if maleduc>=7 & maleduc!=. 
bysort man: egen max_maleduc_cat=max(maleduc_cat)
bysort man: egen min_maleduc_cat=min(maleduc_cat)
gen male_inihk=.
replace male_inihk=maleduc_cat if couple_marries==1
replace male_inihk=min_maleduc_cat if male_inihk==. & couple_marries==1
gen male_type=male_inihk
replace male_type=max_maleduc_cat if single==1 & gender==1 & obs==1
*- age groups
gen femage=.
replace femage=age if gender==2
replace femage=s_age if s_gender==2
replace femage=. if femage==999
gen fem_age=0
replace fem_age=1 if femage<=25
replace fem_age=2 if femage>=26 & femage<=28
replace fem_age=3 if femage>=29 & femage<=31
replace fem_age=4 if femage>=32 & femage<=34
replace fem_age=5 if femage>=35 & femage<=37
replace fem_age=6 if femage>=38 & femage<=40
replace fem_age=7 if femage>=41 & femage<=43
replace fem_age=8 if femage>=44 & femage<=46
replace fem_age=9 if femage>=47 & femage<=49
replace fem_age=10 if femage>=50 
replace fem_age=. if femage==.
gen maleage=.
replace maleage=age if gender==1
replace maleage=s_age if s_gender==1
replace maleage=. if maleage==999
gen male_age=0
replace male_age=1 if maleage<=25
replace male_age=2 if maleage>=26 & maleage<=28
replace male_age=3 if maleage>=29 & maleage<=31
replace male_age=4 if maleage>=32 & maleage<=34
replace male_age=5 if maleage>=35 & maleage<=37
replace male_age=6 if maleage>=38 & maleage<=40
replace male_age=7 if maleage>=41 & maleage<=43
replace male_age=8 if maleage>=44 & maleage<=46
replace male_age=9 if maleage>=47 & maleage<=49
replace male_age=10 if maleage>=50 
replace male_age=. if maleage==.
gen hh_age=male_age
replace hh_age=fem_age if single==1 & gender==2
gen hh_intval=.
replace hh_intval=1 if  hh_age<=2 
replace hh_intval=2 if  hh_age>=3 & hh_age<=4 
replace hh_intval=3 if  hh_age>=5 & hh_age<=6 
replace hh_intval=4 if  hh_age>=7 & hh_age<=10 

sort hh_id year   

*- male age at divorce
gen male_ageat_div=maleage[_n-1]+1 if couple_divorces==1
*- age of household at divorce
gen agediv=.
replace agediv=1 if male_ageat_div<=25
replace agediv=2 if male_ageat_div>=26 & male_ageat_div<=28
replace agediv=3 if male_ageat_div>=29 & male_ageat_div<=31
replace agediv=4 if male_ageat_div>=32 & male_ageat_div<=34
replace agediv=5 if male_ageat_div>=35 & male_ageat_div<=37
replace agediv=6 if male_ageat_div>=38 & male_ageat_div<=40
replace agediv=7 if male_ageat_div>=41 & male_ageat_div<=43
replace agediv=8 if male_ageat_div>=44 & male_ageat_div<=46
replace agediv=9 if male_ageat_div>=47 & male_ageat_div<=49
replace agediv=10 if male_ageat_div>=50 
replace agediv=. if male_ageat_div==.

bysort hh_id: egen age_at_div=min(agediv) 

bysort hh_id: egen male_ageat_lastmarr=max(maleage) if ever_divorced==0 
replace male_ageat_lastmarr=male_ageat_div-1 if ever_divorced==1 

gen agelastmarr=.
replace agelastmarr=1 if male_ageat_lastmarr<=25
replace agelastmarr=2 if male_ageat_lastmarr>=26 & male_ageat_lastmarr<=28
replace agelastmarr=3 if male_ageat_lastmarr>=29 & male_ageat_lastmarr<=31
replace agelastmarr=4 if male_ageat_lastmarr>=32 & male_ageat_lastmarr<=34
replace agelastmarr=5 if male_ageat_lastmarr>=35 & male_ageat_lastmarr<=37
replace agelastmarr=6 if male_ageat_lastmarr>=38 & male_ageat_lastmarr<=40
replace agelastmarr=7 if male_ageat_lastmarr>=41 & male_ageat_lastmarr<=43
replace agelastmarr=8 if male_ageat_lastmarr>=44 & male_ageat_lastmarr<=46
replace agelastmarr=9 if male_ageat_lastmarr>=47 & male_ageat_lastmarr<=49
replace agelastmarr=10 if male_ageat_lastmarr>=50 
replace agelastmarr=. if male_ageat_lastmarr==.

bysort hh_id: egen duration=min(agelastmarr)

replace couple_divorces=0 if couple==1 & couple_divorces==.
replace couple_divorces=. if ever_divorced==1 & year> min_yeardiv 
gen hh_age_wdiv=hh_age
replace hh_age_wdiv=age_at_div if couple_divorces==1
forvalues t=1(1)10{
bysort hh_id hh_age_wdiv: egen dh_`t'=max(couple_divorces) if hh_age_wdiv==`t' 
																		 
																		 
}

forvalues t=1(1)10{
bysort hh_id: egen dh_t`t'=mean(dh_`t') 
}

forvalues i=1(1)4{
bysort hh_id hh_intval: egen dh_int`i'=max(couple_divorces) if hh_intval==`i' 
}

forvalues i=1(1)4{
bysort hh_id: egen dh_I`i'=mean(dh_int`i') 
}

*- frequency of stay-at-home wife 

gen hw=.
replace hw=1 if hrs_wk_annual==0 & hrs_wk_annual!=. &gender==2 
replace hw=0 if hrs_wk_annual>0 & hrs_wk_annual!=. &gender==2
replace hw=1 if s_hrs_wk_annual==0 & s_hrs_wk_annual!=. &s_gender==2 
replace hw=0 if s_hrs_wk_annual>0 & s_hrs_wk_annual!=. &s_gender==2
replace hw=. if s_hrs_wk_annual==9999&s_gender==2 
replace hw=. if hrs_wk_annual==9999&gender==2 
replace hw=. if couple==1 & first_marriage==. 
bysort hh_id: egen sahw=mean(hw) 

forvalues t=1(1)10{
bysort hh_id hh_age: egen sahw_`t'=mean(hw) if hh_age==`t'
}
forvalues t=1(1)10{
bysort hh_id: egen sahw_t`t'=mean(sahw_`t') 
}

forvalues i=1(1)4{
bysort hh_id hh_intval: egen sahw_int`i'=mean(hw) if hh_intval==`i'
}

forvalues i=1(1)4{
bysort hh_id: egen sahw_I`i'=mean(sahw_int`i') 
}

*- sort and order
sort couple hh_id sample_person year
order hh_id sample_person Nbr_obs year state hh_age couple first_marriage man woman marit max_marit min_marit year_marriage min_yearmarr couple_marries ever_divorced year_divorce min_yeardiv couple_divorces age_at_div duration sahw* dh*

* Types of couples

gen couple_type=.
replace couple_type=1 if fem_inihk==1 & male_inihk==1
replace couple_type=2 if fem_inihk==1 & male_inihk==2
replace couple_type=3 if fem_inihk==1 & male_inihk==3
replace couple_type=4 if fem_inihk==2 & male_inihk==1
replace couple_type=5 if fem_inihk==2 & male_inihk==2
replace couple_type=6 if fem_inihk==2 & male_inihk==3
replace couple_type=7 if fem_inihk==3 & male_inihk==1
replace couple_type=8 if fem_inihk==3 & male_inihk==2
replace couple_type=9 if fem_inihk==3 & male_inihk==3
replace couple_type=10 if single==1&max_femeduc_cat==1&obs==1
replace couple_type=11 if single==1&max_femeduc_cat==2&obs==1
replace couple_type=12 if single==1&max_femeduc_cat==3&obs==1
replace couple_type=13 if single==1&max_maleduc_cat==1&obs==1
replace couple_type=14 if single==1&max_maleduc_cat==2&obs==1
replace couple_type=15 if single==1&max_maleduc_cat==3&obs==1

bysort hh_id: egen couple_type2=max(couple_type)

drop couple_type
rename couple_type2 couple_type

* Organize and save
bysort couple_type: egen sahw2=mean(hw) 
bysort couple_type: egen sahw3=mean(sahw) 

gen observations=.
gen households=.

forvalues i=1(1)15{
forvalues r=1(1)4{
count if couple_type==`i' & USCBregion==`r'
replace observations=r(N) if couple_type==`i' & USCBregion==`r'
count if couple_type==`i' & obs==1 & USCBregion==`r'
replace households=r(N) if couple_type==`i' & USCBregion==`r'
}
}

save data_for_earnings-moments.dta, replace 

keep if couple_marries==1 | single==1&obs==1 
drop if couple_type==.
save data_for_moments.dta, replace


*- save sample size data

order USCBregion couple_type households observations  
keep USCBregion couple_type households observations  
duplicates drop 
sort USCBregion couple_type
outsheet using "sample_sizes_4mkts.csv", comma replace

save sample_sizes_4mkts, replace

}							  
									  

